# -*- coding: utf-8 -*-
import scrapy
from copy import deepcopy


class TmSpider(scrapy.Spider):
    name = 'tm'
    allowed_domains = ['tmall.com']
    start_urls = ['https://list.tmall.com/search_product.htm?&q=%BA%EC%BE%C6']

    def parse(self, response):
        divs = response.xpath('//*[@id="J_ItemList"]/div')

        for div in divs:
            item = {}
            item['price'] = div.xpath('./div/p[1]/em/text()').extract_first()
            item['title'] = div.xpath('./div/p[2]/a/@title').extract_first()
            # 成交数量
            item['bargain'] = div.xpath('./div/p[3]/span[1]/em/text()').extract_first()
            item['content'] = div.xpath('./div/p[3]/span[2]/a/text()').extract_first()
            item['image_url'] = response.urljoin(div.xpath('./div/div[1]/a/img/@data-ks-lazyload').extract_first())
            if item['image_url'] is None:
                item['image_url'] = response.urljoin(div.xpath('./div/div[1]/a/img/@src').extract_first())
            yield item

        next_page = 'https://list.tmall.com/search_product.htm?type=pc&q=%BA%EC%BE%C6&totalPage=100&jumpto={}#J_Filter'
        next_url = int(response.xpath('//b[@class="ui-page-s-len"]/text()').extract_first().split('/')[0])
        next_url += 1

        if next_url != 100:
            next_url = next_page.format(next_url)
            # print(next_url)
            print 'please say'